{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> Our labels are located in the 'genres' column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function,division\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((8520, 6),\n",
       " Index([u'appid', u'name', u'detailed_description', u'about_the_game',\n",
       "        u'categories', u'genres'],\n",
       "       dtype='object'))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"../../data/steam/data.clean.csv\")\n",
    "df.shape,df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appid</th>\n",
       "      <th>name</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>about_the_game</th>\n",
       "      <th>categories</th>\n",
       "      <th>genres</th>\n",
       "      <th>num_labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>30</td>\n",
       "      <td>Day of Defeat</td>\n",
       "      <td>enlist in an intense brand of axis vs. allied ...</td>\n",
       "      <td>enlist in an intense brand of axis vs. allied ...</td>\n",
       "      <td>Multi-player,Valve Anti-Cheat enabled</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40</td>\n",
       "      <td>Deathmatch Classic</td>\n",
       "      <td>enjoy fast-paced multiplayer gaming with death...</td>\n",
       "      <td>enjoy fast-paced multiplayer gaming with death...</td>\n",
       "      <td>Multi-player,Valve Anti-Cheat enabled</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>50</td>\n",
       "      <td>Half-Life: Opposing Force</td>\n",
       "      <td>return to the black mesa research facility as ...</td>\n",
       "      <td>return to the black mesa research facility as ...</td>\n",
       "      <td>Single-player,Multi-player,Valve Anti-Cheat en...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60</td>\n",
       "      <td>Ricochet</td>\n",
       "      <td>a futuristic action game that challenges your ...</td>\n",
       "      <td>a futuristic action game that challenges your ...</td>\n",
       "      <td>Multi-player,Valve Anti-Cheat enabled</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>70</td>\n",
       "      <td>Half-Life</td>\n",
       "      <td>named game of the year by over 50 publications...</td>\n",
       "      <td>named game of the year by over 50 publications...</td>\n",
       "      <td>Single-player,Multi-player,Valve Anti-Cheat en...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>80</td>\n",
       "      <td>Counter-Strike: Condition Zero</td>\n",
       "      <td>with its extensive tour of duty campaign, a ne...</td>\n",
       "      <td>with its extensive tour of duty campaign, a ne...</td>\n",
       "      <td>Single-player,Multi-player,Valve Anti-Cheat en...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>130</td>\n",
       "      <td>Half-Life: Blue Shift</td>\n",
       "      <td>made by gearbox software and originally releas...</td>\n",
       "      <td>made by gearbox software and originally releas...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>220</td>\n",
       "      <td>Half-Life 2</td>\n",
       "      <td>1998. half-life sends a shock through the game...</td>\n",
       "      <td>1998. half-life sends a shock through the game...</td>\n",
       "      <td>Single-player,Steam Achievements,Steam Trading...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>240</td>\n",
       "      <td>Counter-Strike: Source</td>\n",
       "      <td>the next installment of the world's # 1 online...</td>\n",
       "      <td>the next installment of the world's # 1 online...</td>\n",
       "      <td>Multi-player,Cross-Platform Multiplayer,Steam ...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>280</td>\n",
       "      <td>Half-Life: Source</td>\n",
       "      <td>winner of over 50 game of the year awards, hal...</td>\n",
       "      <td>winner of over 50 game of the year awards, hal...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>300</td>\n",
       "      <td>Day of Defeat: Source</td>\n",
       "      <td>day of defeat offers intense online action gam...</td>\n",
       "      <td>day of defeat offers intense online action gam...</td>\n",
       "      <td>Multi-player,Cross-Platform Multiplayer,Steam ...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>320</td>\n",
       "      <td>Half-Life 2: Deathmatch</td>\n",
       "      <td>fast multiplayer action set in the half-life 2...</td>\n",
       "      <td>fast multiplayer action set in the half-life 2...</td>\n",
       "      <td>Multi-player,Valve Anti-Cheat enabled,Includes...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>340</td>\n",
       "      <td>Half-Life 2: Lost Coast</td>\n",
       "      <td>originally planned as a section of the highway...</td>\n",
       "      <td>originally planned as a section of the highway...</td>\n",
       "      <td>Single-player,Commentary available</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>360</td>\n",
       "      <td>Half-Life Deathmatch: Source</td>\n",
       "      <td>half-life deathmatch: source is a recreation o...</td>\n",
       "      <td>half-life deathmatch: source is a recreation o...</td>\n",
       "      <td>Multi-player,Valve Anti-Cheat enabled</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>380</td>\n",
       "      <td>Half-Life 2: Episode One</td>\n",
       "      <td>half-life 2 has sold over 4 million copies wor...</td>\n",
       "      <td>half-life 2 has sold over 4 million copies wor...</td>\n",
       "      <td>Single-player,Steam Achievements,Captions avai...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>400</td>\n",
       "      <td>Portal</td>\n",
       "      <td>portal is a new single player game from valve....</td>\n",
       "      <td>portal is a new single player game from valve....</td>\n",
       "      <td>Single-player,Steam Achievements,Captions avai...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>420</td>\n",
       "      <td>Half-Life 2: Episode Two</td>\n",
       "      <td>half-life 2: episode two is the second in a tr...</td>\n",
       "      <td>half-life 2: episode two is the second in a tr...</td>\n",
       "      <td>Single-player,Steam Achievements,Captions avai...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>440</td>\n",
       "      <td>Team Fortress 2</td>\n",
       "      <td>the tough break update is now available!about ...</td>\n",
       "      <td>\"the most fun you can have online\" - pc gamer ...</td>\n",
       "      <td>Multi-player,Cross-Platform Multiplayer,Steam ...</td>\n",
       "      <td>Action,Free to Play</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>500</td>\n",
       "      <td>Left 4 Dead</td>\n",
       "      <td>steam big pictureabout the gamefrom valve (the...</td>\n",
       "      <td>from valve (the creators of counter-strike, ha...</td>\n",
       "      <td>Single-player,Multi-player,Co-op,Steam Achieve...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>550</td>\n",
       "      <td>Left 4 Dead 2</td>\n",
       "      <td>set in the zombie apocalypse, left 4 dead 2 (l...</td>\n",
       "      <td>set in the zombie apocalypse, left 4 dead 2 (l...</td>\n",
       "      <td>Single-player,Multi-player,Co-op,Steam Achieve...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>570</td>\n",
       "      <td>Dota 2</td>\n",
       "      <td>dota is a competitive game of action and strat...</td>\n",
       "      <td>dota is a competitive game of action and strat...</td>\n",
       "      <td>Multi-player,Co-op,Steam Trading Cards,Steam W...</td>\n",
       "      <td>Action,Free to Play,Strategy</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>620</td>\n",
       "      <td>Portal 2</td>\n",
       "      <td>portal 2 draws from the award-winning formula ...</td>\n",
       "      <td>portal 2 draws from the award-winning formula ...</td>\n",
       "      <td>Single-player,Co-op,Steam Achievements,Full co...</td>\n",
       "      <td>Action,Adventure</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>630</td>\n",
       "      <td>Alien Swarm</td>\n",
       "      <td>alien swarm is a game and source sdk release f...</td>\n",
       "      <td>alien swarm is a game and source sdk release f...</td>\n",
       "      <td>Single-player,Multi-player,Co-op,Steam Achieve...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>730</td>\n",
       "      <td>Counter-Strike: Global Offensive</td>\n",
       "      <td>counter-strike: global offensive (cs: go) will...</td>\n",
       "      <td>counter-strike: global offensive (cs: go) will...</td>\n",
       "      <td>Multi-player,Steam Achievements,Full controlle...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1002</td>\n",
       "      <td>Rag Doll Kung Fu</td>\n",
       "      <td>featuring a wide collection of single and mult...</td>\n",
       "      <td>featuring a wide collection of single and mult...</td>\n",
       "      <td>Single-player,Multi-player</td>\n",
       "      <td>Indie</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1200</td>\n",
       "      <td>Red Orchestra: Ostfront 41-45</td>\n",
       "      <td>fight in the theatre of war that changed the w...</td>\n",
       "      <td>fight in the theatre of war that changed the w...</td>\n",
       "      <td>Multi-player,Steam Achievements,Valve Anti-Che...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1250</td>\n",
       "      <td>Killing Floor</td>\n",
       "      <td>steam halloween sale - kf and all dlc - 75% of...</td>\n",
       "      <td>killing floor is a co-op survival horror fps s...</td>\n",
       "      <td>Single-player,Multi-player,Cross-Platform Mult...</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1300</td>\n",
       "      <td>SiN Episodes: Emergence</td>\n",
       "      <td>you are john blade, commander of hardcorps, an...</td>\n",
       "      <td>you are john blade, commander of hardcorps, an...</td>\n",
       "      <td>Single-player,Stats</td>\n",
       "      <td>Action</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1500</td>\n",
       "      <td>Darwinia</td>\n",
       "      <td>combining fast-paced action with strategic bat...</td>\n",
       "      <td>combining fast-paced action with strategic bat...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Indie,Strategy</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1510</td>\n",
       "      <td>Uplink</td>\n",
       "      <td>you play an uplink agent who makes a living by...</td>\n",
       "      <td>you play an uplink agent who makes a living by...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Indie,Strategy</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8490</th>\n",
       "      <td>457230</td>\n",
       "      <td>Atlas Reactor VR Character Viewer</td>\n",
       "      <td>outwit to outlive in atlas reactor, the genre-...</td>\n",
       "      <td>outwit to outlive in atlas reactor, the genre-...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Strategy</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8491</th>\n",
       "      <td>457420</td>\n",
       "      <td>Surgeon Simulator VR: Meet The Medic</td>\n",
       "      <td>surgeon simulator vr: meet the medic be the me...</td>\n",
       "      <td>surgeon simulator vr: meet the medic be the me...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Free to Play,Indie,Simulation</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8492</th>\n",
       "      <td>457440</td>\n",
       "      <td>Watch This!</td>\n",
       "      <td>watch this! is a first-person single player pl...</td>\n",
       "      <td>watch this! is a first-person single player pl...</td>\n",
       "      <td>Single-player,Steam Achievements,Partial Contr...</td>\n",
       "      <td>Action,Adventure,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8493</th>\n",
       "      <td>457450</td>\n",
       "      <td>Defend your Crypt</td>\n",
       "      <td>defend your crypt is a strategy and puzzle gam...</td>\n",
       "      <td>defend your crypt is a strategy and puzzle gam...</td>\n",
       "      <td>Single-player,Steam Achievements,Steam Trading...</td>\n",
       "      <td>Indie,Strategy</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8494</th>\n",
       "      <td>457480</td>\n",
       "      <td>Domino Sky</td>\n",
       "      <td>domino sky is a physics based game where you p...</td>\n",
       "      <td>domino sky is a physics based game where you p...</td>\n",
       "      <td>Single-player,Steam Achievements,Steam Trading...</td>\n",
       "      <td>Casual,Indie,Simulation,Strategy</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8495</th>\n",
       "      <td>457490</td>\n",
       "      <td>Forgotten, Not Lost - A Kinetic Novel</td>\n",
       "      <td>an old farmer lives with his wife - however, h...</td>\n",
       "      <td>an old farmer lives with his wife - however, h...</td>\n",
       "      <td>Single-player,Steam Trading Cards</td>\n",
       "      <td>Casual,RPG,Simulation</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8496</th>\n",
       "      <td>457520</td>\n",
       "      <td>Moustache Mountain</td>\n",
       "      <td>it is said, that an ancient civilization once ...</td>\n",
       "      <td>it is said, that an ancient civilization once ...</td>\n",
       "      <td>Single-player,Steam Achievements,Full controll...</td>\n",
       "      <td>Action,Casual,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8497</th>\n",
       "      <td>457530</td>\n",
       "      <td>My Lady</td>\n",
       "      <td>you are miss bauxmont, the heiress to the baux...</td>\n",
       "      <td>you are miss bauxmont, the heiress to the baux...</td>\n",
       "      <td>Single-player,Steam Achievements,Captions avai...</td>\n",
       "      <td>Casual,Indie,Simulation</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8498</th>\n",
       "      <td>457570</td>\n",
       "      <td>Camp Sunshine</td>\n",
       "      <td>camp sunshine is a 16-bit blood-soaked horror ...</td>\n",
       "      <td>camp sunshine is a 16-bit blood-soaked horror ...</td>\n",
       "      <td>Single-player,Steam Achievements,Partial Contr...</td>\n",
       "      <td>Action,Adventure,Casual,Indie,RPG</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8499</th>\n",
       "      <td>457580</td>\n",
       "      <td>The Visitor</td>\n",
       "      <td>the visitor is a vr only horror experience abo...</td>\n",
       "      <td>the visitor is a vr only horror experience abo...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Casual,Indie</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8500</th>\n",
       "      <td>457690</td>\n",
       "      <td>Hotel Blind</td>\n",
       "      <td>hotel blind is a simulator of a blind person i...</td>\n",
       "      <td>hotel blind is a simulator of a blind person i...</td>\n",
       "      <td>Single-player,Steam Achievements,Partial Contr...</td>\n",
       "      <td>Casual,Indie,Simulation</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8501</th>\n",
       "      <td>457710</td>\n",
       "      <td>Road Madness</td>\n",
       "      <td>in the 21st century, the world is occurred by ...</td>\n",
       "      <td>in the 21st century, the world is occurred by ...</td>\n",
       "      <td>Single-player,Steam Achievements,Steam Trading...</td>\n",
       "      <td>Action,Racing</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8502</th>\n",
       "      <td>457790</td>\n",
       "      <td>Capria: Magic of the Elements</td>\n",
       "      <td>capria: magic of the elements is a first-perso...</td>\n",
       "      <td>capria: magic of the elements is a first-perso...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Casual,Indie,Early Access</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8503</th>\n",
       "      <td>457820</td>\n",
       "      <td>Outrage</td>\n",
       "      <td>outrage is a short cyberpunk dungeon crawler. ...</td>\n",
       "      <td>outrage is a short cyberpunk dungeon crawler. ...</td>\n",
       "      <td>Single-player,Steam Achievements,Steam Trading...</td>\n",
       "      <td>Adventure,Indie,RPG</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8504</th>\n",
       "      <td>457860</td>\n",
       "      <td>Apollo 11 VR</td>\n",
       "      <td>apollo 11 vr is the story of the greatest jour...</td>\n",
       "      <td>apollo 11 vr is the story of the greatest jour...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Adventure,Simulation</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8505</th>\n",
       "      <td>457870</td>\n",
       "      <td>Minigame Party VR</td>\n",
       "      <td>what is minigame party vr?minigame party vr is...</td>\n",
       "      <td>what is minigame party vr?minigame party vr is...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Casual,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8506</th>\n",
       "      <td>457930</td>\n",
       "      <td>Starship: Nova Strike</td>\n",
       "      <td>we have been waiting for you commander. the dz...</td>\n",
       "      <td>we have been waiting for you commander. the dz...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Casual,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8507</th>\n",
       "      <td>457940</td>\n",
       "      <td>Krog Wars</td>\n",
       "      <td>a new action game that can be played in 2d mod...</td>\n",
       "      <td>a new action game that can be played in 2d mod...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Casual,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8508</th>\n",
       "      <td>457960</td>\n",
       "      <td>Holopoint</td>\n",
       "      <td>holopoint is pure archery madness. fight your ...</td>\n",
       "      <td>holopoint is pure archery madness. fight your ...</td>\n",
       "      <td>Single-player,Full controller support</td>\n",
       "      <td>Action,Indie,Simulation,Sports</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8509</th>\n",
       "      <td>458030</td>\n",
       "      <td>StarFringe: Adversus</td>\n",
       "      <td>this is the story about the beginning of the c...</td>\n",
       "      <td>this is the story about the beginning of the c...</td>\n",
       "      <td>Single-player,Steam Achievements</td>\n",
       "      <td>Strategy,Early Access</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8510</th>\n",
       "      <td>458290</td>\n",
       "      <td>Space Bit Attack</td>\n",
       "      <td>notice: made for vrspace bit attack was made w...</td>\n",
       "      <td>the galaxy needs you! transport yourself into ...</td>\n",
       "      <td>Single-player,Full controller support</td>\n",
       "      <td>Action,Indie</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8511</th>\n",
       "      <td>458370</td>\n",
       "      <td>VR Baseball - Home Run Derby</td>\n",
       "      <td>there is no feeling greater than stepping up t...</td>\n",
       "      <td>there is no feeling greater than stepping up t...</td>\n",
       "      <td>Single-player,Full controller support,VR Support</td>\n",
       "      <td>Indie,Sports</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8512</th>\n",
       "      <td>458420</td>\n",
       "      <td>Last Heroes 3</td>\n",
       "      <td>presentationlast heroes is an rpg developed by...</td>\n",
       "      <td>presentationlast heroes is an rpg developed by...</td>\n",
       "      <td>Single-player,Full controller support,Steam Tr...</td>\n",
       "      <td>Adventure,Casual,Indie,RPG,Strategy</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8513</th>\n",
       "      <td>458700</td>\n",
       "      <td>Cursor Challenge</td>\n",
       "      <td>do you want to test your skills and your refle...</td>\n",
       "      <td>do you want to test your skills and your refle...</td>\n",
       "      <td>Single-player,Co-op</td>\n",
       "      <td>Casual,Indie</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8514</th>\n",
       "      <td>458900</td>\n",
       "      <td>Julai</td>\n",
       "      <td>julai - is an arcade flight shooter.the action...</td>\n",
       "      <td>julai - is an arcade flight shooter.the action...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Casual,Indie,Simulation</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8515</th>\n",
       "      <td>459100</td>\n",
       "      <td>Epsilon corp.</td>\n",
       "      <td>epsilon is a secret community. there are a lot...</td>\n",
       "      <td>epsilon is a secret community. there are a lot...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Action,Adventure,Indie</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8516</th>\n",
       "      <td>459260</td>\n",
       "      <td>Bowslinger</td>\n",
       "      <td>disclaimer: this game requires an htc vive to ...</td>\n",
       "      <td>disclaimer: this game requires an htc vive to ...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Casual,Indie,Sports</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8517</th>\n",
       "      <td>459310</td>\n",
       "      <td>The Hero Project: Redemption Season</td>\n",
       "      <td>america's #1 reality show for heroes is back f...</td>\n",
       "      <td>america's #1 reality show for heroes is back f...</td>\n",
       "      <td>Single-player,Steam Achievements,Captions avai...</td>\n",
       "      <td>Adventure,Indie,RPG</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8518</th>\n",
       "      <td>459630</td>\n",
       "      <td>R.C. Bot Inc.</td>\n",
       "      <td>energy and money: iridium. the race has begun!...</td>\n",
       "      <td>energy and money: iridium. the race has begun!...</td>\n",
       "      <td>Single-player,Steam Achievements,Full controll...</td>\n",
       "      <td>Action,Casual,Indie,Simulation,Strategy</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8519</th>\n",
       "      <td>460150</td>\n",
       "      <td>One Last Chance</td>\n",
       "      <td>do you remember that high school crush you nev...</td>\n",
       "      <td>do you remember that high school crush you nev...</td>\n",
       "      <td>Single-player</td>\n",
       "      <td>Casual,Indie,Simulation</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8520 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       appid                                   name  \\\n",
       "0         30                          Day of Defeat   \n",
       "1         40                     Deathmatch Classic   \n",
       "2         50              Half-Life: Opposing Force   \n",
       "3         60                               Ricochet   \n",
       "4         70                              Half-Life   \n",
       "5         80         Counter-Strike: Condition Zero   \n",
       "6        130                  Half-Life: Blue Shift   \n",
       "7        220                            Half-Life 2   \n",
       "8        240                 Counter-Strike: Source   \n",
       "9        280                      Half-Life: Source   \n",
       "10       300                  Day of Defeat: Source   \n",
       "11       320                Half-Life 2: Deathmatch   \n",
       "12       340                Half-Life 2: Lost Coast   \n",
       "13       360           Half-Life Deathmatch: Source   \n",
       "14       380               Half-Life 2: Episode One   \n",
       "15       400                                 Portal   \n",
       "16       420               Half-Life 2: Episode Two   \n",
       "17       440                        Team Fortress 2   \n",
       "18       500                            Left 4 Dead   \n",
       "19       550                          Left 4 Dead 2   \n",
       "20       570                                 Dota 2   \n",
       "21       620                               Portal 2   \n",
       "22       630                            Alien Swarm   \n",
       "23       730       Counter-Strike: Global Offensive   \n",
       "24      1002                       Rag Doll Kung Fu   \n",
       "25      1200          Red Orchestra: Ostfront 41-45   \n",
       "26      1250                          Killing Floor   \n",
       "27      1300                SiN Episodes: Emergence   \n",
       "28      1500                               Darwinia   \n",
       "29      1510                                 Uplink   \n",
       "...      ...                                    ...   \n",
       "8490  457230      Atlas Reactor VR Character Viewer   \n",
       "8491  457420   Surgeon Simulator VR: Meet The Medic   \n",
       "8492  457440                            Watch This!   \n",
       "8493  457450                      Defend your Crypt   \n",
       "8494  457480                             Domino Sky   \n",
       "8495  457490  Forgotten, Not Lost - A Kinetic Novel   \n",
       "8496  457520                     Moustache Mountain   \n",
       "8497  457530                                My Lady   \n",
       "8498  457570                          Camp Sunshine   \n",
       "8499  457580                            The Visitor   \n",
       "8500  457690                            Hotel Blind   \n",
       "8501  457710                           Road Madness   \n",
       "8502  457790          Capria: Magic of the Elements   \n",
       "8503  457820                                Outrage   \n",
       "8504  457860                           Apollo 11 VR   \n",
       "8505  457870                      Minigame Party VR   \n",
       "8506  457930                  Starship: Nova Strike   \n",
       "8507  457940                              Krog Wars   \n",
       "8508  457960                              Holopoint   \n",
       "8509  458030                   StarFringe: Adversus   \n",
       "8510  458290                       Space Bit Attack   \n",
       "8511  458370           VR Baseball - Home Run Derby   \n",
       "8512  458420                          Last Heroes 3   \n",
       "8513  458700                       Cursor Challenge   \n",
       "8514  458900                                  Julai   \n",
       "8515  459100                          Epsilon corp.   \n",
       "8516  459260                             Bowslinger   \n",
       "8517  459310    The Hero Project: Redemption Season   \n",
       "8518  459630                          R.C. Bot Inc.   \n",
       "8519  460150                        One Last Chance   \n",
       "\n",
       "                                   detailed_description  \\\n",
       "0     enlist in an intense brand of axis vs. allied ...   \n",
       "1     enjoy fast-paced multiplayer gaming with death...   \n",
       "2     return to the black mesa research facility as ...   \n",
       "3     a futuristic action game that challenges your ...   \n",
       "4     named game of the year by over 50 publications...   \n",
       "5     with its extensive tour of duty campaign, a ne...   \n",
       "6     made by gearbox software and originally releas...   \n",
       "7     1998. half-life sends a shock through the game...   \n",
       "8     the next installment of the world's # 1 online...   \n",
       "9     winner of over 50 game of the year awards, hal...   \n",
       "10    day of defeat offers intense online action gam...   \n",
       "11    fast multiplayer action set in the half-life 2...   \n",
       "12    originally planned as a section of the highway...   \n",
       "13    half-life deathmatch: source is a recreation o...   \n",
       "14    half-life 2 has sold over 4 million copies wor...   \n",
       "15    portal is a new single player game from valve....   \n",
       "16    half-life 2: episode two is the second in a tr...   \n",
       "17    the tough break update is now available!about ...   \n",
       "18    steam big pictureabout the gamefrom valve (the...   \n",
       "19    set in the zombie apocalypse, left 4 dead 2 (l...   \n",
       "20    dota is a competitive game of action and strat...   \n",
       "21    portal 2 draws from the award-winning formula ...   \n",
       "22    alien swarm is a game and source sdk release f...   \n",
       "23    counter-strike: global offensive (cs: go) will...   \n",
       "24    featuring a wide collection of single and mult...   \n",
       "25    fight in the theatre of war that changed the w...   \n",
       "26    steam halloween sale - kf and all dlc - 75% of...   \n",
       "27    you are john blade, commander of hardcorps, an...   \n",
       "28    combining fast-paced action with strategic bat...   \n",
       "29    you play an uplink agent who makes a living by...   \n",
       "...                                                 ...   \n",
       "8490  outwit to outlive in atlas reactor, the genre-...   \n",
       "8491  surgeon simulator vr: meet the medic be the me...   \n",
       "8492  watch this! is a first-person single player pl...   \n",
       "8493  defend your crypt is a strategy and puzzle gam...   \n",
       "8494  domino sky is a physics based game where you p...   \n",
       "8495  an old farmer lives with his wife - however, h...   \n",
       "8496  it is said, that an ancient civilization once ...   \n",
       "8497  you are miss bauxmont, the heiress to the baux...   \n",
       "8498  camp sunshine is a 16-bit blood-soaked horror ...   \n",
       "8499  the visitor is a vr only horror experience abo...   \n",
       "8500  hotel blind is a simulator of a blind person i...   \n",
       "8501  in the 21st century, the world is occurred by ...   \n",
       "8502  capria: magic of the elements is a first-perso...   \n",
       "8503  outrage is a short cyberpunk dungeon crawler. ...   \n",
       "8504  apollo 11 vr is the story of the greatest jour...   \n",
       "8505  what is minigame party vr?minigame party vr is...   \n",
       "8506  we have been waiting for you commander. the dz...   \n",
       "8507  a new action game that can be played in 2d mod...   \n",
       "8508  holopoint is pure archery madness. fight your ...   \n",
       "8509  this is the story about the beginning of the c...   \n",
       "8510  notice: made for vrspace bit attack was made w...   \n",
       "8511  there is no feeling greater than stepping up t...   \n",
       "8512  presentationlast heroes is an rpg developed by...   \n",
       "8513  do you want to test your skills and your refle...   \n",
       "8514  julai - is an arcade flight shooter.the action...   \n",
       "8515  epsilon is a secret community. there are a lot...   \n",
       "8516  disclaimer: this game requires an htc vive to ...   \n",
       "8517  america's #1 reality show for heroes is back f...   \n",
       "8518  energy and money: iridium. the race has begun!...   \n",
       "8519  do you remember that high school crush you nev...   \n",
       "\n",
       "                                         about_the_game  \\\n",
       "0     enlist in an intense brand of axis vs. allied ...   \n",
       "1     enjoy fast-paced multiplayer gaming with death...   \n",
       "2     return to the black mesa research facility as ...   \n",
       "3     a futuristic action game that challenges your ...   \n",
       "4     named game of the year by over 50 publications...   \n",
       "5     with its extensive tour of duty campaign, a ne...   \n",
       "6     made by gearbox software and originally releas...   \n",
       "7     1998. half-life sends a shock through the game...   \n",
       "8     the next installment of the world's # 1 online...   \n",
       "9     winner of over 50 game of the year awards, hal...   \n",
       "10    day of defeat offers intense online action gam...   \n",
       "11    fast multiplayer action set in the half-life 2...   \n",
       "12    originally planned as a section of the highway...   \n",
       "13    half-life deathmatch: source is a recreation o...   \n",
       "14    half-life 2 has sold over 4 million copies wor...   \n",
       "15    portal is a new single player game from valve....   \n",
       "16    half-life 2: episode two is the second in a tr...   \n",
       "17    \"the most fun you can have online\" - pc gamer ...   \n",
       "18    from valve (the creators of counter-strike, ha...   \n",
       "19    set in the zombie apocalypse, left 4 dead 2 (l...   \n",
       "20    dota is a competitive game of action and strat...   \n",
       "21    portal 2 draws from the award-winning formula ...   \n",
       "22    alien swarm is a game and source sdk release f...   \n",
       "23    counter-strike: global offensive (cs: go) will...   \n",
       "24    featuring a wide collection of single and mult...   \n",
       "25    fight in the theatre of war that changed the w...   \n",
       "26    killing floor is a co-op survival horror fps s...   \n",
       "27    you are john blade, commander of hardcorps, an...   \n",
       "28    combining fast-paced action with strategic bat...   \n",
       "29    you play an uplink agent who makes a living by...   \n",
       "...                                                 ...   \n",
       "8490  outwit to outlive in atlas reactor, the genre-...   \n",
       "8491  surgeon simulator vr: meet the medic be the me...   \n",
       "8492  watch this! is a first-person single player pl...   \n",
       "8493  defend your crypt is a strategy and puzzle gam...   \n",
       "8494  domino sky is a physics based game where you p...   \n",
       "8495  an old farmer lives with his wife - however, h...   \n",
       "8496  it is said, that an ancient civilization once ...   \n",
       "8497  you are miss bauxmont, the heiress to the baux...   \n",
       "8498  camp sunshine is a 16-bit blood-soaked horror ...   \n",
       "8499  the visitor is a vr only horror experience abo...   \n",
       "8500  hotel blind is a simulator of a blind person i...   \n",
       "8501  in the 21st century, the world is occurred by ...   \n",
       "8502  capria: magic of the elements is a first-perso...   \n",
       "8503  outrage is a short cyberpunk dungeon crawler. ...   \n",
       "8504  apollo 11 vr is the story of the greatest jour...   \n",
       "8505  what is minigame party vr?minigame party vr is...   \n",
       "8506  we have been waiting for you commander. the dz...   \n",
       "8507  a new action game that can be played in 2d mod...   \n",
       "8508  holopoint is pure archery madness. fight your ...   \n",
       "8509  this is the story about the beginning of the c...   \n",
       "8510  the galaxy needs you! transport yourself into ...   \n",
       "8511  there is no feeling greater than stepping up t...   \n",
       "8512  presentationlast heroes is an rpg developed by...   \n",
       "8513  do you want to test your skills and your refle...   \n",
       "8514  julai - is an arcade flight shooter.the action...   \n",
       "8515  epsilon is a secret community. there are a lot...   \n",
       "8516  disclaimer: this game requires an htc vive to ...   \n",
       "8517  america's #1 reality show for heroes is back f...   \n",
       "8518  energy and money: iridium. the race has begun!...   \n",
       "8519  do you remember that high school crush you nev...   \n",
       "\n",
       "                                             categories  \\\n",
       "0                 Multi-player,Valve Anti-Cheat enabled   \n",
       "1                 Multi-player,Valve Anti-Cheat enabled   \n",
       "2     Single-player,Multi-player,Valve Anti-Cheat en...   \n",
       "3                 Multi-player,Valve Anti-Cheat enabled   \n",
       "4     Single-player,Multi-player,Valve Anti-Cheat en...   \n",
       "5     Single-player,Multi-player,Valve Anti-Cheat en...   \n",
       "6                                         Single-player   \n",
       "7     Single-player,Steam Achievements,Steam Trading...   \n",
       "8     Multi-player,Cross-Platform Multiplayer,Steam ...   \n",
       "9                                         Single-player   \n",
       "10    Multi-player,Cross-Platform Multiplayer,Steam ...   \n",
       "11    Multi-player,Valve Anti-Cheat enabled,Includes...   \n",
       "12                   Single-player,Commentary available   \n",
       "13                Multi-player,Valve Anti-Cheat enabled   \n",
       "14    Single-player,Steam Achievements,Captions avai...   \n",
       "15    Single-player,Steam Achievements,Captions avai...   \n",
       "16    Single-player,Steam Achievements,Captions avai...   \n",
       "17    Multi-player,Cross-Platform Multiplayer,Steam ...   \n",
       "18    Single-player,Multi-player,Co-op,Steam Achieve...   \n",
       "19    Single-player,Multi-player,Co-op,Steam Achieve...   \n",
       "20    Multi-player,Co-op,Steam Trading Cards,Steam W...   \n",
       "21    Single-player,Co-op,Steam Achievements,Full co...   \n",
       "22    Single-player,Multi-player,Co-op,Steam Achieve...   \n",
       "23    Multi-player,Steam Achievements,Full controlle...   \n",
       "24                           Single-player,Multi-player   \n",
       "25    Multi-player,Steam Achievements,Valve Anti-Che...   \n",
       "26    Single-player,Multi-player,Cross-Platform Mult...   \n",
       "27                                  Single-player,Stats   \n",
       "28                                        Single-player   \n",
       "29                                        Single-player   \n",
       "...                                                 ...   \n",
       "8490                                      Single-player   \n",
       "8491                                      Single-player   \n",
       "8492  Single-player,Steam Achievements,Partial Contr...   \n",
       "8493  Single-player,Steam Achievements,Steam Trading...   \n",
       "8494  Single-player,Steam Achievements,Steam Trading...   \n",
       "8495                  Single-player,Steam Trading Cards   \n",
       "8496  Single-player,Steam Achievements,Full controll...   \n",
       "8497  Single-player,Steam Achievements,Captions avai...   \n",
       "8498  Single-player,Steam Achievements,Partial Contr...   \n",
       "8499                                      Single-player   \n",
       "8500  Single-player,Steam Achievements,Partial Contr...   \n",
       "8501  Single-player,Steam Achievements,Steam Trading...   \n",
       "8502                                      Single-player   \n",
       "8503  Single-player,Steam Achievements,Steam Trading...   \n",
       "8504                                      Single-player   \n",
       "8505                                      Single-player   \n",
       "8506                                      Single-player   \n",
       "8507                                      Single-player   \n",
       "8508              Single-player,Full controller support   \n",
       "8509                   Single-player,Steam Achievements   \n",
       "8510              Single-player,Full controller support   \n",
       "8511   Single-player,Full controller support,VR Support   \n",
       "8512  Single-player,Full controller support,Steam Tr...   \n",
       "8513                                Single-player,Co-op   \n",
       "8514                                      Single-player   \n",
       "8515                                      Single-player   \n",
       "8516                                      Single-player   \n",
       "8517  Single-player,Steam Achievements,Captions avai...   \n",
       "8518  Single-player,Steam Achievements,Full controll...   \n",
       "8519                                      Single-player   \n",
       "\n",
       "                                       genres  num_labels  \n",
       "0                                      Action           1  \n",
       "1                                      Action           1  \n",
       "2                                      Action           1  \n",
       "3                                      Action           1  \n",
       "4                                      Action           1  \n",
       "5                                      Action           1  \n",
       "6                                      Action           1  \n",
       "7                                      Action           1  \n",
       "8                                      Action           1  \n",
       "9                                      Action           1  \n",
       "10                                     Action           1  \n",
       "11                                     Action           1  \n",
       "12                                     Action           1  \n",
       "13                                     Action           1  \n",
       "14                                     Action           1  \n",
       "15                                     Action           1  \n",
       "16                                     Action           1  \n",
       "17                        Action,Free to Play           2  \n",
       "18                                     Action           1  \n",
       "19                                     Action           1  \n",
       "20               Action,Free to Play,Strategy           3  \n",
       "21                           Action,Adventure           2  \n",
       "22                                     Action           1  \n",
       "23                                     Action           1  \n",
       "24                                      Indie           1  \n",
       "25                                     Action           1  \n",
       "26                                     Action           1  \n",
       "27                                     Action           1  \n",
       "28                             Indie,Strategy           2  \n",
       "29                             Indie,Strategy           2  \n",
       "...                                       ...         ...  \n",
       "8490                                 Strategy           1  \n",
       "8491     Action,Free to Play,Indie,Simulation           4  \n",
       "8492                   Action,Adventure,Indie           3  \n",
       "8493                           Indie,Strategy           2  \n",
       "8494         Casual,Indie,Simulation,Strategy           4  \n",
       "8495                    Casual,RPG,Simulation           3  \n",
       "8496                      Action,Casual,Indie           3  \n",
       "8497                  Casual,Indie,Simulation           3  \n",
       "8498        Action,Adventure,Casual,Indie,RPG           5  \n",
       "8499                             Casual,Indie           2  \n",
       "8500                  Casual,Indie,Simulation           3  \n",
       "8501                            Action,Racing           2  \n",
       "8502         Action,Casual,Indie,Early Access           4  \n",
       "8503                      Adventure,Indie,RPG           3  \n",
       "8504                     Adventure,Simulation           2  \n",
       "8505                      Action,Casual,Indie           3  \n",
       "8506                      Action,Casual,Indie           3  \n",
       "8507                      Action,Casual,Indie           3  \n",
       "8508           Action,Indie,Simulation,Sports           4  \n",
       "8509                    Strategy,Early Access           2  \n",
       "8510                             Action,Indie           2  \n",
       "8511                             Indie,Sports           2  \n",
       "8512      Adventure,Casual,Indie,RPG,Strategy           5  \n",
       "8513                             Casual,Indie           2  \n",
       "8514           Action,Casual,Indie,Simulation           4  \n",
       "8515                   Action,Adventure,Indie           3  \n",
       "8516                      Casual,Indie,Sports           3  \n",
       "8517                      Adventure,Indie,RPG           3  \n",
       "8518  Action,Casual,Indie,Simulation,Strategy           5  \n",
       "8519                  Casual,Indie,Simulation           3  \n",
       "\n",
       "[8520 rows x 7 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['num_labels'] = df['genres'].apply(lambda txt: len(txt.split(',')))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.4605633802816902"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# label cardinality\n",
    "cardinality = df['num_labels'].mean()\n",
    "cardinality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(23,\n",
       " {'Accounting',\n",
       "  'Action',\n",
       "  'Adventure',\n",
       "  'Animation & Modeling',\n",
       "  'Audio Production',\n",
       "  'Casual',\n",
       "  'Design & Illustration',\n",
       "  'Early Access',\n",
       "  'Education',\n",
       "  'Free to Play',\n",
       "  'Indie',\n",
       "  'Massively Multiplayer',\n",
       "  'Photo Editing',\n",
       "  'RPG',\n",
       "  'Racing',\n",
       "  'Simulation',\n",
       "  'Software Training',\n",
       "  'Sports',\n",
       "  'Strategy',\n",
       "  'Uncategorized',\n",
       "  'Utilities',\n",
       "  'Video Production',\n",
       "  'Web Publishing'})"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['labelset'] = df['genres'].map(lambda txt: set(txt.split(',')))\n",
    "\n",
    "label_list_list = df['labels'].tolist()\n",
    "flattened_list_of_labels = [label for label_list in label_list_list for label in label_list]\n",
    "unique_labels = set(flattened_list_of_labels)\n",
    "L = len(unique_labels)\n",
    "L,unique_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10698101653398653"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# label density\n",
    "label_density = cardinality / L\n",
    "label_density"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "585"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# number of distinct labelsets\n",
    "distinct_labelsets = []\n",
    "\n",
    "for labelset in df['labelset'].tolist():\n",
    "    \n",
    "    x = str(sorted(tuple(labelset)))\n",
    "    \n",
    "    if x not in distinct_labelsets:\n",
    "        distinct_labelsets.append(x)\n",
    "\n",
    "num_distinct_labelsets = len(distinct_labelsets)\n",
    "num_distinct_labelsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "237"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# number of labelsets appearing in a single instance\n",
    "\n",
    "# this code is far from optimal; crude quick solution only\n",
    "\n",
    "occurrences = dict()\n",
    "for x in distinct_labelsets:\n",
    "    occurrences[x] = 0\n",
    "    \n",
    "for index,row in df.iterrows():\n",
    "    labelset_str = str(sorted(tuple(row['labelset'])))\n",
    "    occurrences[labelset_str] += 1\n",
    "    \n",
    "one_hit_wonders = 0    \n",
    "    \n",
    "for k,v in occurrences.iteritems():\n",
    "    if v == 1:\n",
    "        one_hit_wonders += 1\n",
    "        \n",
    "one_hit_wonders    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
